import numpy as npimport matplotlib.pyplot as pltimport pandas as pdimport scipy.stats as statsfrom scipy.stats import lognormfrom numba import jitfrom sksurv.compare import compare_survivalfrom sksurv.nonparametric import kaplan_meier_estimator
Code
import gsc_model_functions as gmf
For this we need to load in the patient data and calualte the distribution params
Code
# load in the patient survival datahistoric_df = pd.read_csv("Rho_D_data.csv")censored_str ="Censorship (1=censored)"survival_str ="Overall Survival"# only keep patients that weren't censoredhistoric_df = historic_df[historic_df[censored_str] ==0]# cut off patients that had very high proliferation ratehistoric_df = historic_df[historic_df["PIHNA rho"] <100 ] historic_df[censored_str] =True# fit distribution to the datadist_name ='lognorm'# Replace with the desired distribution namedist =getattr(stats, dist_name)params = dist.fit(historic_df["PIHNA rho"])shape = params[0]loc = params[1]scale = params[-1]
Define a custom version of this function to make specific figs.
Code
def my_phase2_trial_fun(n_trials,n_patients,distinct_arms,rho_case,shape,loc,scale): # n_trials (int), number of virtual clinical trials to calculate average from.# n_patients (int), number of patients per phase 2 virtual trial# distinct_arms (bool), whether the BMP4 and noBMP4 arms should be distinct sub-populations# rho_case (int), how to select patients from the rho distributionfrom gsc_model_params import mu,eta,n,k,delta_s,delta_v,delta_m,delta_b,u_s,C,lam,Ps_max,Ps_min,psi,mv_rho_scale,ms_mv_scale,mv_rho_scale,ms_mv_scale,detect_threshold,death_threshold,detection_sensitivity,death_sensitivity,alpha_rho_scale,resection_to_RT_delay,t_RT_interval,t_RT_cycle,n_RT_repeat,n_RT_cycles,t_RT_wait,resect_fraction s0 =0.001# Initial GSCs u0 = np.zeros(n+1) u0[0] = s0 n0 =0.001 s0 =0.01*n0 # fraction of initial tumour v_ratio =1.95# ratio between successive compartments v0 = ((n0-s0)*(v_ratio-1)/(v_ratio**n-1))*(v_ratio**np.arange(n)) u0 = np.zeros(n+1) u0[0] = s0 u0[1:] = v0 psi_mean_values = [0, 0.5, 5, 10, 20, 30, 50] # mean psi value to generate samples from turn normal frac_succ = np.zeros(len(psi_mean_values)) # save the number of trials that are successful# set up time grid t_final =8000 dt =0.01 t = np.arange(0, t_final+dt/2, dt) save_data = np.zeros([n_trials*n_patients, 5]) # store all the data we need# for each trial find out p value p_values = np.zeros([len(psi_mean_values),n_trials]) BMP4_mean_survival = np.zeros([len(psi_mean_values),n_trials]) noBMP4_mean_survival = np.zeros([len(psi_mean_values),n_trials]) mean_psi = np.zeros([len(psi_mean_values),n_trials]) mean_rho_BMP4 = np.zeros([len(psi_mean_values),n_trials]) mean_rho_noBMP4 = np.zeros([len(psi_mean_values),n_trials]) all_rhos = np.zeros([len(psi_mean_values),n_trials*n_patients])# we want each patient to have a unique random seed so that across all simulations they get the same series of random numbers random_seeds = np.arange(0,n_patients,1) w =0for psi_mean in psi_mean_values:for trial inrange(n_trials):# for each trial generate a unique set of heterogenous patients# set up distinct sets for BMP and noBMP first, then overwrite the latter with the former if distinct sets are not required np.random.seed(trial) if psi_mean==0 : psi_samples_BMP4 = np.zeros(n_patients) psi_samples_noBMP4 = np.zeros(n_patients)else : psi_samples_BMP4 = gmf.trunc_norm(psi_mean,1,n_patients) psi_samples_noBMP4 = gmf.trunc_norm(psi_mean,1,n_patients) np.random.seed(trial) pro_rates_sampled_BMP4 = np.sort(lognorm.rvs(shape, loc, scale=scale, size=n_patients)) pro_rates_sampled_noBMP4 = np.sort(lognorm.rvs(shape, loc, scale=scale, size=n_patients))if rho_case==0 : ### consider four cases here beyond the base case:### 0) sample required n_patients pro_rates_sampled_BMP4 = np.sort(lognorm.rvs(shape, loc, scale=scale, size=n_patients)) pro_rates_sampled_noBMP4 = np.sort(lognorm.rvs(shape, loc, scale=scale, size=n_patients))elif rho_case==1 :### 1) sample 5x required, take the top 20% (n_patients) pro_rates_sampled_BMP4 = np.sort(lognorm.rvs(shape, loc, scale=scale, size=5*n_patients)) pro_rates_sampled_BMP4 = pro_rates_sampled_BMP4[-n_patients:] pro_rates_sampled_noBMP4 = np.sort(lognorm.rvs(shape, loc, scale=scale, size=5*n_patients)) pro_rates_sampled_noBMP4 = pro_rates_sampled_noBMP4[-n_patients:]elif rho_case==2 :### 2) sample 2x required, take the top 50% (n_patients) (draw from different distributions) pro_rates_sampled_BMP4 = np.sort(lognorm.rvs(shape, loc, scale=scale, size=2*n_patients)) pro_rates_sampled_BMP4 = pro_rates_sampled_BMP4[-n_patients:] pro_rates_sampled_noBMP4 = np.sort(lognorm.rvs(shape, loc, scale=scale, size=2*n_patients)) pro_rates_sampled_noBMP4 = pro_rates_sampled_noBMP4[-n_patients:]elif rho_case==3 :### 3) sample a distribution with 2x scale parameter (not enough to get much response) pro_rates_sampled_BMP4 = np.sort(lognorm.rvs(shape, loc, scale=2*scale, size=n_patients)) pro_rates_sampled_noBMP4 = np.sort(lognorm.rvs(shape, loc, scale=2*scale, size=n_patients))elif rho_case==4 :### 4) sample a distribution with 3x scale parameter (this is enough to get a significant response) pro_rates_sampled_BMP4 = np.sort(lognorm.rvs(shape, loc, scale=3*scale, size=n_patients)) pro_rates_sampled_noBMP4 = np.sort(lognorm.rvs(shape, loc, scale=3*scale, size=n_patients))elif rho_case==5 :### 5) sample 4x required, take the top 50% (2*n_patients) and split in two between BMP4 and noBMP4 pro_rates_sampled = np.sort(lognorm.rvs(shape, loc, scale=scale, size=4*n_patients)) pro_rates_sampled_BMP4 = pro_rates_sampled[-2*n_patients::2] pro_rates_sampled_noBMP4 = pro_rates_sampled[-(2*n_patients-1)::2]elif rho_case==7:### 7) sample 4x requiered, take the bottom 50% slowest proliferation rates and split them between BMP4 and noBMP4 pro_rates_sampled = np.sort(lognorm.rvs(shape, loc, scale=scale, size=4*n_patients)) pro_rates_sampled_BMP4 = pro_rates_sampled[1:2*n_patients:2] pro_rates_sampled_noBMP4 = pro_rates_sampled[0:2*n_patients-1:2]elif rho_case ==-100:### 6) sample 4x required, take the middle 50% (n_patients) and split in two between BMP4 and noBMP4 pro_rates_sampled = np.sort(lognorm.rvs(shape, loc, scale=scale, size=4*n_patients)) pro_rates_sampled_noBMP4 = pro_rates_sampled[20:20+(2*n_patients):2] pro_rates_sampled_BMP4 = pro_rates_sampled[21: 21+2*(n_patients):2]elif rho_case ==22: pro_rates_sampled = np.sort(lognorm.rvs(shape, loc, scale=scale, size=n_patients)) pro_rates_sampled_BMP4 = pro_rates_sampled pro_rates_sampled_noBMP4 = pro_rates_sampledifnot(distinct_arms) :#pro_rates_sampled_noBMP4 = np.sort(lognorm.rvs(shape, loc, scale=scale, size=2*n_patients))#pro_rates_sampled_noBMP4 = pro_rates_sampled_noBMP4[10:10+n_patients] pro_rates_sampled_BMP4 = pro_rates_sampled_noBMP4 psi_samples_noBMP4 = psi_samples_BMP4 all_rhos[w,trial*n_patients:(trial*n_patients+n_patients)] = pro_rates_sampled_BMP4 rad_on =1 BMP4_on =1 resect_on =1 q =0# loop variable BMP4_survival = np.zeros(n_patients) noBMP4_survival = np.zeros(n_patients)for psi, pro_r inzip(psi_samples_BMP4,pro_rates_sampled_BMP4): mv = mv_rho_scale*pro_r*np.ones(n) ms = ms_mv_scale*mv[0] mv[n-1] =0# calc alpha as proportional to rho alpha = gmf.calc_alpha_from_rho(pro_r) beta = gmf.calc_beta(alpha)# simulate the model u,N,VS,t,m,B,detect_size,detect_t,_,_ = gmf.simulate_model(t_final,dt,u0,psi,Ps_max,Ps_min,n,k,ms,mv,delta_s,delta_v,delta_m,delta_b,C,u_s,n_RT_repeat,n_RT_cycles,rad_on,alpha,beta,eta,mu,t_RT_wait,t_RT_interval,detect_threshold,detection_sensitivity,death_sensitivity,lam,resection_to_RT_delay,BMP4_on,death_threshold,resect_on,resect_fraction,random_seeds[q])# save the survival time of the BMP4 arm save_data[(trial*n_patients)+q,2] = t[-1]-detect_t# svae the trial number save_data[(trial*n_patients)+q,0] = trial# save the psi value save_data[(trial*n_patients)+q,1] = psi BMP4_survival[q] = t[-1]-detect_t q = q+1# for each of the patients run the same thing again but with no BMP4 to act as a virtual control rad_on =1 BMP4_on =0 resect_on =1 q =0# loop variablefor psi, pro_r inzip(psi_samples_noBMP4, pro_rates_sampled_noBMP4): mv = mv_rho_scale*pro_r*np.ones(n) ms = ms_mv_scale*mv[0] mv[n-1] =0# calc alpha as proportional to rho alpha = gmf.calc_alpha_from_rho(pro_r) beta = gmf.calc_beta(alpha) u,N,VS,t,m,B,detect_size,detect_t,_,_ = gmf.simulate_model(t_final,dt,u0,psi,Ps_max,Ps_min,n,k,ms,mv,delta_s,delta_v,delta_m,delta_b,C,u_s,n_RT_repeat,n_RT_cycles,rad_on,alpha,beta,eta,mu,t_RT_wait,t_RT_interval,detect_threshold,detection_sensitivity,death_sensitivity,lam,resection_to_RT_delay,BMP4_on,death_threshold,resect_on,resect_fraction,random_seeds[q])# save the survival time of the virtual control arm save_data[(trial*n_patients)+q,3] = t[-1]-detect_t# svae the trial number save_data[(trial*n_patients)+q,0] = trial noBMP4_survival[q] = t[-1]-detect_t q = q+1 BMP4_mean_survival[w,trial] = np.mean(BMP4_survival) noBMP4_mean_survival[w,trial] = np.mean(noBMP4_survival) mean_psi[w,trial] = np.mean(psi_samples_BMP4) mean_rho_BMP4[w,trial] = np.mean(pro_rates_sampled_BMP4) mean_rho_noBMP4[w,trial] = np.mean(pro_rates_sampled_noBMP4) survival_BMP4_df = pd.DataFrame(save_data, columns=['trial','psi','Survival_time_BMP4','Virtual_Control', 'Status'])# has to be set to boolean not just integer survival_BMP4_df['Status'] =True nrows =int(np.sqrt(n_trials)) ncols =int(n_trials/nrows) fig = plt.figure() gs = fig.add_gridspec(nrows, ncols, hspace=0, wspace=0) axs = gs.subplots(sharex=True, sharey=True)for i inrange(n_trials): df = survival_BMP4_df[survival_BMP4_df['trial']==i]# need to create a structed array: dtype = [('event_indicator', bool), ('time', float)] time = np.zeros([n_patients*2]) time[0:n_patients,] = np.array(df["Virtual_Control"]) time[n_patients:,] = np.array(df["Survival_time_BMP4"]) event_indicators = np.ones([n_patients*2]) structured_array = np.array( list(zip(event_indicators, time)) , dtype=dtype) group = np.zeros(n_patients*2) group[n_patients:] =1 chisq, p_value, stats1, covariance = compare_survival(y = structured_array, group_indicator= group, return_stats=True) p_values[w,i] = p_value time, survival_prob, conf_int = kaplan_meier_estimator(df["Status"], df["Virtual_Control"], conf_type="log-log") fig.axes[i].step(time, survival_prob, where="post") fig.axes[i].fill_between(time, conf_int[0], conf_int[1], alpha=0.25, step="post", label='_nolegend_') time, survival_prob, conf_int = kaplan_meier_estimator(df["Status"], df["Survival_time_BMP4"], conf_type="log-log") fig.axes[i].step(time, survival_prob, where="post") fig.axes[i].fill_between(time, conf_int[0], conf_int[1], alpha=0.25, step="post", label='_nolegend_')# fig.axes[i].ylim(0, 1) fig.axes[i].text(.9,.8,'p='+str('%.*g'% (2, p_value)),horizontalalignment='right',transform=fig.axes[i].transAxes, fontsize =14)if p_value <0.05 : fig.axes[i].set_facecolor('0.9') fig.supylabel("Survival", fontsize=20) fig.supxlabel("Time (day)", fontsize=20) fig.suptitle(r"Simulated survival BMP4 + RT, "+str(n_trials) +" trials", fontsize=20)# fig.subplots_adjust(bottom=0.2) fig.legend([r"Virtual control", r"Simulated BMP4 $\bar\psi = $"+str(psi_mean)],loc="upper left", ncol=1, fontsize=20) fig.tight_layout()# fraction of successful trials frac_succ[w] = np.sum(p_values[w,:] <0.05)/n_trials w = w+1return psi_mean_values,frac_succ,BMP4_mean_survival,noBMP4_mean_survival,mean_rho_BMP4,mean_rho_noBMP4,p_values
Distinct arms
In distincr arms we sample a number more than the required number of samples and then take a certain section (eg: fasters / slowest proliferating)
middle 50% proliferation rate stratified with identical proliferations rates for no-BMP4 ans BMP4 arms
Code
n_trials=20n_patients=20# important thing here is that distinct_arms is set to Falsepsi_mean_values,frac_succ,BMP4_mean_survival,noBMP4_mean_survival,mean_rho_BMP4,mean_rho_noBMP4,p_values = my_phase2_trial_fun(n_trials,n_patients, distinct_arms=True, rho_case=-100, shape=shape, loc=loc, scale=scale)frac_suc_mid_d = frac_succBM4_mean_survival_rand = BMP4_mean_survivalp_values_rand = p_valuesnoBMP4_mean_survival_rand = noBMP4_mean_survival
plt.figure()plt.plot(psi_mean_values,frac_suc_lower_d, 'o-') plt.plot(psi_mean_values,frac_succ_upper_d, 'o-')plt.plot(psi_mean_values,frac_suc_mid_d, 'o-') plt.xlabel(r"Mean $\psi$", fontsize=14)plt.ylabel("Fraction of successful trials", fontsize=14)plt.ylim(-0.1,1.1)plt.title('probablity of successful trial depends on patient statification, '+str(n_patients) +' patients', fontsize=14)plt.xticks(fontsize=12)plt.yticks(fontsize=12)plt.legend(['slow proliferation','fast proliferation', "medium proliferation"], fontsize=12)plt.tight_layout()plt.show()
Identical arms
in Identical arms we consider exactly the same proliferation rates for BMP4 and noBMP4 arms. Also all patients have identical psi vlaues in the different treatment arms.
middle 50% proliferation rate stratified with identical proliferations rates for no-BMP4 ans BMP4 arms
Code
n_trials=20n_patients=20# important thing here is that distinct_arms is set to Falsepsi_mean_values,frac_succ,BMP4_mean_survival,noBMP4_mean_survival,mean_rho_BMP4,mean_rho_noBMP4,p_values = my_phase2_trial_fun(n_trials,n_patients, distinct_arms=False, rho_case=-100, shape=shape, loc=loc, scale=scale)frac_suc_mid_I = frac_succBM4_mean_survival_rand = BMP4_mean_survivalp_values_rand = p_valuesnoBMP4_mean_survival_rand = noBMP4_mean_survival
plt.figure()plt.plot(psi_mean_values,frac_suc_lower_I, 'o-') plt.plot(psi_mean_values,frac_succ_upper_I, 'o-')plt.plot(psi_mean_values,frac_suc_mid_I, 'o-') plt.xlabel(r"Mean $\psi$", fontsize=14)plt.ylabel("Fraction of successful trials", fontsize=14)plt.ylim(-0.1,1.1)plt.title('probablity of successful trial depends on patient statification, '+str(n_patients) +' patients', fontsize=14)plt.xticks(fontsize=12)plt.yticks(fontsize=12)plt.legend(['slow proliferation','fast proliferation', "medium proliferation"], fontsize=12)plt.tight_layout()plt.show()
what if we just sample 20 patients and use identical control arm
Code
n_trials=20n_patients=20# important thing here is that distinct_arms is set to Falsepsi_mean_values,frac_succ,BMP4_mean_survival,noBMP4_mean_survival,mean_rho_BMP4,mean_rho_noBMP4,p_values = my_phase2_trial_fun(n_trials,n_patients, distinct_arms=False, rho_case=22, shape=shape, loc=loc, scale=scale)frac_suc_rand = frac_succBM4_mean_survival_rand = BMP4_mean_survivalp_values_rand = p_valuesnoBMP4_mean_survival_rand = noBMP4_mean_survival